# Import packages
import pandas as pd
import numpy as np
import os
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import cufflinks as cf
cf.go_offline()
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.python.ops.gen_array_ops import size
from keras.layers.advanced_activations import LeakyReLU
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score
BATH_PATH = os.path.dirname(os.path.abspath(__file__))
data_path = os.path.join(BATH_PATH, "..", "data")
# Read data
def get_PET_history(floor):
path = os.path.join(data_path, F"PET_{floor}F_data-20210401_20210831.csv")
PET_data = pd.read_csv(path)
PET_data["Time"] = pd.to_datetime(PET_data["Time"])
PET_data.sort_values(by="Time", inplace=True)
PET_data.set_index("Time", inplace=True)
PET_data = PET_data.loc[~PET_data.index.duplicated(keep="first")]
return PET_data
def get_FCU_history(floor):
path = os.path.join(data_path, F"FCU_{floor}F_data-20210201_20210831.csv")
FCU_data = pd.read_csv(path)
FCU_data["Time"] = pd.to_datetime(FCU_data["Time"])
FCU_data.sort_values(by="Time", inplace=True)
FCU_data.set_index("Time", inplace=True)
FCU_data = FCU_data.loc[~FCU_data.index.duplicated(keep='first')]
return FCU_data
def get_AHU_history(floor):
path = os.path.join(data_path, F"AHU_{floor}F_data-20210201_20210831.csv")
AHU_data = pd.read_csv(path)
AHU_data["Time"] = pd.to_datetime(AHU_data["Time"])
AHU_data.sort_values(by="Time", inplace=True)
AHU_data.set_index("Time", inplace=True)
AHU_data = AHU_data.loc[~AHU_data.index.duplicated(keep='first')]
return AHU_data
def get_CO2_history(floor):
path = os.path.join(data_path, F"CDS_{floor}F_data-20210201_20210831.csv")
CDS_data = pd.read_csv(path)
CDS_data["Time"] = pd.to_datetime(CDS_data["Time"])
CDS_data.sort_values(by="Time", inplace=True)
CDS_data.set_index("Time", inplace=True)
if floor == 8:
CDS_data = CDS_data.drop("CDS_108_8-CO2", axis=1) # deviation in this sensor data
CDS_data = CDS_data.loc[~CDS_data.index.duplicated(keep='first')]
CDS_data["Floor_CO2_mean"] = CDS_data.mean(axis=1)
return CDS_data.loc[:, ["Floor_CO2_mean"]]
def get_CWB_history():
path = os.path.join(data_path, "CWB_data-20210201_20210831.csv")
CWB_data = pd.read_csv(path)
CWB_data["Time"] = pd.to_datetime(CWB_data["Time"])
CWB_data.sort_values(by="Time", inplace=True)
CWB_data.set_index("Time", inplace=True)
CWB_data = CWB_data.loc[~CWB_data.index.duplicated(keep='first')]
return CWB_data
def merge_data(floor):
CWB_data = get_CWB_history()
FCU_data = get_FCU_history(floor)
AHU_data = get_AHU_history(floor)
CDS_data = get_CO2_history(floor)
PET_data = get_PET_history(floor)
ALL_data = pd.concat([CWB_data, FCU_data, AHU_data, CDS_data, PET_data], axis=1, join="outer")
# Drop insignificant or noisy variable
ALL_data = ALL_data.drop(["RH", "GloblRad", "PAH-OA_FLOW", "PAH-FAN_RUN_CMD", "PAH-DMP_POS", "PET_Target"], axis=1)
return ALL_data.dropna()
floor = 8
display(merge_data(floor))
merge_data(floor).iplot()
| Temperature | FCU-RA_T | FCU-SP | FCU-ST | FCU-WST | PAH-VFD_SPD | PAH-ST1 | PAH-SA_T | Floor_CO2_mean | PET_Indoor | SP_Target | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Time | |||||||||||
| 2021-04-01 00:00:00+08:00 | 24.500000 | 22.313543 | 23.1 | 0.0 | 3.0 | 0.0 | 0.0 | 22.400000 | 481.424007 | 22.520543 | 23.0 |
| 2021-04-01 00:10:00+08:00 | 24.483333 | 22.300000 | 23.1 | 0.0 | 3.0 | 0.0 | 0.0 | 22.400000 | 480.063036 | 22.518000 | 23.2 |
| 2021-04-01 00:20:00+08:00 | 24.466667 | 22.300000 | 23.1 | 0.0 | 3.0 | 0.0 | 0.0 | 22.400000 | 478.947236 | 22.517000 | 23.2 |
| 2021-04-01 00:30:00+08:00 | 24.450000 | 22.280525 | 23.1 | 0.0 | 3.0 | 0.0 | 0.0 | 22.400000 | 477.952797 | 22.501525 | 23.2 |
| 2021-04-01 00:40:00+08:00 | 24.433333 | 22.200000 | 23.1 | 0.0 | 3.0 | 0.0 | 0.0 | 22.400000 | 477.826316 | 22.444000 | 23.3 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2021-08-31 23:10:00+08:00 | 29.350000 | 23.697488 | 25.0 | 0.0 | 3.0 | 0.0 | 0.0 | 24.000000 | 427.503254 | 23.921000 | 25.0 |
| 2021-08-31 23:20:00+08:00 | 29.300000 | 23.600000 | 25.0 | 0.0 | 3.0 | 0.0 | 0.0 | 24.068112 | 427.301354 | 23.920000 | 25.0 |
| 2021-08-31 23:30:00+08:00 | 29.250000 | 23.600000 | 25.0 | 0.0 | 3.0 | 0.0 | 0.0 | 24.149771 | 426.716231 | 23.920000 | 25.0 |
| 2021-08-31 23:40:00+08:00 | 29.200000 | 23.567579 | 25.0 | 0.0 | 3.0 | 0.0 | 0.0 | 24.254774 | 426.336100 | 23.920000 | 25.0 |
| 2021-08-31 23:50:00+08:00 | 29.150000 | 23.500000 | 25.0 | 0.0 | 3.0 | 0.0 | 0.0 | 24.300000 | 426.310519 | 23.919000 | 25.0 |
21960 rows × 11 columns
# Create timeseries features (here is just the sample not the real structure in the field)
def get_train_feature(target, floor, future_step=1):
# Preparing features (can also add some features if you'd like to try other models)
data = merge_data(floor)
# data.insert(loc=0, column="DoY", value=(data.index.dayofyear).values) # seems not significant
data.insert(loc=1, column="Hour", value=(data.index.hour).values + (pd.Series(data.index).dt.minute).values/60)
data.insert(loc=2, column="DoW", value=(data.index.dayofweek).values)
data.insert(loc=3, column="HoW", value=(data["Hour"] + 24 * data.index.dayofweek).values)
# features set (add lag features for future prediction: the [t-1] situation leads to [t] outcome)
for col in data.loc[:, ~data.columns.str.contains(target)].columns:
data.loc[:, F"{col}-lag"] = data.loc[:, col].shift(future_step)
# target can also shift as lag features (but seems not significant)
# data.loc[:, F"{target}-lag"] = data.loc[:, target].shift(future_step)
return data.dropna()
# Scale data may get some help
def data_scale(dataset, scale_type="MinMax"):
if scale_type =="MinMax":
data_min = dataset.min()
data_max = dataset.max()
scaled_df = (dataset - data_min) / (data_max - data_min)
pars = {"data_min": data_min, "data_max": data_max}
return scaled_df, pars
if scale_type =="normalize":
data_mean = dataset.mean()
data_std = dataset.std()
normalized_df = (dataset - data_mean) / data_std
pars = {"data_mean": data_mean, "data_std": data_std}
return normalized_df, pars
# Split train, valid and test data
def generator(data, start, end):
X = data.loc[start:end, ~data.columns.str.contains(target)]
Y = data.loc[start:end, data.columns.str.contains(target)]
return X, Y
floor = 8
target = "SP_Target"
future_step = 6 * 1 # timestep is 10 min, 6 timestep for 1 hr lag-feature
# data = get_train_feature(target, floor, future_step) # can also try training data without scaled
data = data_scale(get_train_feature(target, floor, future_step))[0]
pars = data_scale(get_train_feature(target, floor, future_step))[1]
# train data
start = "2021-04-01"
end = "2021-06-30"
_train = generator(data, start, end)
train_x = _train[0]
train_y = _train[1]
# valid data
start = "2021-05-31"
end = "2021-07-31"
_val = generator(data, start, end)
val_x = _val[0]
val_y = _val[1]
# test data
start = "2021-06-30"
end = "2021-08-31"
_test = generator(data, start, end)
test_x = _test[0]
test_y = _test[1]
# all data
start = "2021-04-01"
end = "2021-08-31"
_all = generator(data, start, end)
all_x = _all[0]
all_y = _all[1]
display(all_x)
display(all_y)
| Temperature | Hour | DoW | HoW | FCU-RA_T | FCU-SP | FCU-ST | FCU-WST | PAH-VFD_SPD | PAH-ST1 | ... | HoW-lag | FCU-RA_T-lag | FCU-SP-lag | FCU-ST-lag | FCU-WST-lag | PAH-VFD_SPD-lag | PAH-ST1-lag | PAH-SA_T-lag | Floor_CO2_mean-lag | PET_Indoor-lag | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Time | |||||||||||||||||||||
| 2021-04-01 01:00:00+08:00 | 0.383178 | 0.041958 | 0.500000 | 0.434955 | 0.145985 | 0.183333 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 0.428997 | 0.166704 | 0.183333 | 0.0 | 1.0 | 0.0 | 0.0 | 0.664688 | 0.708873 | 0.522040 |
| 2021-04-01 01:10:00+08:00 | 0.383956 | 0.048951 | 0.500000 | 0.435948 | 0.145985 | 0.183333 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 0.429990 | 0.164233 | 0.183333 | 0.0 | 1.0 | 0.0 | 0.0 | 0.664688 | 0.706869 | 0.521720 |
| 2021-04-01 01:20:00+08:00 | 0.384735 | 0.055944 | 0.500000 | 0.436941 | 0.145985 | 0.183333 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 0.430983 | 0.164233 | 0.183333 | 0.0 | 1.0 | 0.0 | 0.0 | 0.664688 | 0.705226 | 0.521594 |
| 2021-04-01 01:30:00+08:00 | 0.385514 | 0.062937 | 0.500000 | 0.437934 | 0.145985 | 0.183333 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 0.431976 | 0.160679 | 0.183333 | 0.0 | 1.0 | 0.0 | 0.0 | 0.664688 | 0.703762 | 0.519643 |
| 2021-04-01 01:40:00+08:00 | 0.386293 | 0.069930 | 0.500000 | 0.438928 | 0.145985 | 0.183333 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 0.432969 | 0.145985 | 0.183333 | 0.0 | 1.0 | 0.0 | 0.0 | 0.664688 | 0.703576 | 0.512391 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2021-08-31 23:10:00+08:00 | 0.614486 | 0.972028 | 0.166667 | 0.281033 | 0.419247 | 0.500000 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 0.275074 | 0.528267 | 0.500000 | 0.0 | 1.0 | 0.0 | 0.0 | 0.685993 | 0.631985 | 0.734818 |
| 2021-08-31 23:20:00+08:00 | 0.612150 | 0.979021 | 0.166667 | 0.282026 | 0.401458 | 0.500000 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 0.276068 | 0.499538 | 0.500000 | 0.0 | 1.0 | 0.0 | 0.0 | 0.693065 | 0.631117 | 0.719315 |
| 2021-08-31 23:30:00+08:00 | 0.609813 | 0.986014 | 0.166667 | 0.283019 | 0.401458 | 0.500000 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 0.277061 | 0.476726 | 0.500000 | 0.0 | 1.0 | 0.0 | 0.0 | 0.697126 | 0.630624 | 0.708473 |
| 2021-08-31 23:40:00+08:00 | 0.607477 | 0.993007 | 0.166667 | 0.284012 | 0.395541 | 0.500000 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 0.278054 | 0.456350 | 0.500000 | 0.0 | 1.0 | 0.0 | 0.0 | 0.702158 | 0.630276 | 0.698809 |
| 2021-08-31 23:50:00+08:00 | 0.605140 | 1.000000 | 0.166667 | 0.285005 | 0.383209 | 0.500000 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 0.279047 | 0.440235 | 0.500000 | 0.0 | 1.0 | 0.0 | 0.0 | 0.706231 | 0.630140 | 0.698833 |
21954 rows × 26 columns
| SP_Target | |
|---|---|
| Time | |
| 2021-04-01 01:00:00+08:00 | 0.366667 |
| 2021-04-01 01:10:00+08:00 | 0.366667 |
| 2021-04-01 01:20:00+08:00 | 0.366667 |
| 2021-04-01 01:30:00+08:00 | 0.366667 |
| 2021-04-01 01:40:00+08:00 | 0.366667 |
| ... | ... |
| 2021-08-31 23:10:00+08:00 | 0.666667 |
| 2021-08-31 23:20:00+08:00 | 0.666667 |
| 2021-08-31 23:30:00+08:00 | 0.666667 |
| 2021-08-31 23:40:00+08:00 | 0.666667 |
| 2021-08-31 23:50:00+08:00 | 0.666667 |
21954 rows × 1 columns
# Model train (the model here is one of the optimized sample)
model_summary = pd.DataFrame()
Epochs = 30
Batch = 6 * 24 * 7
tf.random.set_seed(221)
model = Sequential()
model.add(Dense(units=8, input_dim=train_x.shape[1], activation="relu"))
model.add(Dense(units=16, activation="selu"))
model.add(Dense(units=32, activation=LeakyReLU()))
model.add(Dense(units=16, activation="selu"))
model.add(Dense(units=8, activation="relu"))
model.add(Dense(units=1))
opt = keras.optimizers.Adam()
model.compile(loss="mae", optimizer=opt, metrics=["mse"])
model.fit(train_x, train_y, epochs=Epochs, batch_size=Batch, validation_data=(val_x, val_y))
y_pred = model.predict(test_x)
test_y["pred"] = y_pred[:, 0]
test_y[target] = test_y[target]
test_y["pred"] = test_y["pred"]
fig = go.Figure()
fig.add_trace(go.Scatter(name="real", x=test_y.index, y=test_y[target]))
fig.add_trace(go.Scatter(name="pred", x=test_y.index, y=test_y["pred"]))
fig.show()
test_y = test_y.dropna()
print("r2: ", round(r2_score(test_y[target], test_y["pred"]), 2))
print("MAE: ", round(mae(test_y[target], test_y["pred"]), 2))
print("RMSE: ", round(mse(test_y[target], test_y["pred"]), 2))
model.summary()
Epoch 1/30 13/13 [==============================] - 1s 14ms/step - loss: 0.1130 - mse: 0.0227 - val_loss: 0.0777 - val_mse: 0.0092 Epoch 2/30 13/13 [==============================] - 0s 4ms/step - loss: 0.0899 - mse: 0.0159 - val_loss: 0.0545 - val_mse: 0.0066 Epoch 3/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0819 - mse: 0.0144 - val_loss: 0.0505 - val_mse: 0.0059 Epoch 4/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0765 - mse: 0.0133 - val_loss: 0.0484 - val_mse: 0.0056 Epoch 5/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0722 - mse: 0.0122 - val_loss: 0.0471 - val_mse: 0.0052 Epoch 6/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0685 - mse: 0.0113 - val_loss: 0.0462 - val_mse: 0.0048 Epoch 7/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0656 - mse: 0.0106 - val_loss: 0.0441 - val_mse: 0.0044 Epoch 8/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0633 - mse: 0.0101 - val_loss: 0.0424 - val_mse: 0.0041 Epoch 9/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0618 - mse: 0.0099 - val_loss: 0.0411 - val_mse: 0.0038 Epoch 10/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0607 - mse: 0.0095 - val_loss: 0.0401 - val_mse: 0.0037 Epoch 11/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0598 - mse: 0.0094 - val_loss: 0.0399 - val_mse: 0.0036 Epoch 12/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0589 - mse: 0.0094 - val_loss: 0.0397 - val_mse: 0.0036 Epoch 13/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0580 - mse: 0.0092 - val_loss: 0.0375 - val_mse: 0.0033 Epoch 14/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0569 - mse: 0.0091 - val_loss: 0.0372 - val_mse: 0.0032 Epoch 15/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0564 - mse: 0.0090 - val_loss: 0.0369 - val_mse: 0.0030 Epoch 16/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0562 - mse: 0.0089 - val_loss: 0.0361 - val_mse: 0.0031 Epoch 17/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0554 - mse: 0.0088 - val_loss: 0.0348 - val_mse: 0.0029 Epoch 18/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0553 - mse: 0.0088 - val_loss: 0.0342 - val_mse: 0.0028 Epoch 19/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0546 - mse: 0.0087 - val_loss: 0.0350 - val_mse: 0.0029 Epoch 20/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0545 - mse: 0.0087 - val_loss: 0.0329 - val_mse: 0.0027 Epoch 21/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0537 - mse: 0.0086 - val_loss: 0.0337 - val_mse: 0.0027 Epoch 22/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0533 - mse: 0.0086 - val_loss: 0.0327 - val_mse: 0.0027 Epoch 23/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0529 - mse: 0.0085 - val_loss: 0.0339 - val_mse: 0.0027 Epoch 24/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0531 - mse: 0.0085 - val_loss: 0.0318 - val_mse: 0.0026 Epoch 25/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0525 - mse: 0.0084 - val_loss: 0.0314 - val_mse: 0.0026 Epoch 26/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0527 - mse: 0.0084 - val_loss: 0.0311 - val_mse: 0.0026 Epoch 27/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0518 - mse: 0.0083 - val_loss: 0.0303 - val_mse: 0.0025 Epoch 28/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0530 - mse: 0.0083 - val_loss: 0.0341 - val_mse: 0.0026 Epoch 29/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0532 - mse: 0.0082 - val_loss: 0.0308 - val_mse: 0.0025 Epoch 30/30 13/13 [==============================] - 0s 3ms/step - loss: 0.0509 - mse: 0.0081 - val_loss: 0.0298 - val_mse: 0.0024
r2: 0.64 MAE: 0.04 RMSE: 0.0 Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense (Dense) (None, 8) 216 _________________________________________________________________ dense_1 (Dense) (None, 16) 144 _________________________________________________________________ dense_2 (Dense) (None, 32) 544 _________________________________________________________________ dense_3 (Dense) (None, 16) 528 _________________________________________________________________ dense_4 (Dense) (None, 8) 136 _________________________________________________________________ dense_5 (Dense) (None, 1) 9 ================================================================= Total params: 1,577 Trainable params: 1,577 Non-trainable params: 0 _________________________________________________________________
plot_df = test_y * (pars["data_max"][target] - pars["data_min"][target]) + pars["data_min"][target]
plot_df.iplot(title=F"{target}-real vs prediction")
# this model only have to be precise at the time we need
# for such a special application, here can use schedule rules to optimize predictive strategy
plot_df["pred"] = np.where(np.logical_or(plot_df.index.dayofweek>4, np.logical_or(plot_df.index.hour<7, plot_df.index.hour>21)), 25, plot_df["pred"])
plot_df.iplot(title=F"{target}-real vs prediction")
# the control strategy of HVAC Setpoint temperature assure the indoor thermal comfort
print("r2: ", round(r2_score(plot_df[target], plot_df["pred"]), 2))
print("MAE: ", round(mae(plot_df[target], plot_df["pred"]), 2))
print("RMSE: ", round(mse(plot_df[target], plot_df["pred"]), 2))
r2: 0.84 MAE: 0.13 RMSE: 0.07